package com.xyq.fs.index;

import java.io.EOFException;
import java.io.File;
import java.io.IOException;

import org.apache.lucene.document.TextField;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;

import com.xyq.fs.base.MyDirectoryS;
import com.xyq.fs.constants.FieldConstant;


public class IndexPdfFiles implements IndexFileInf {

	static PDFTextStripper stripper;

	static {
		try {
			stripper = new PDFTextStripper();
		} catch (IOException e) {
			e.printStackTrace();
		}
	}

	public String getContent(File file, String upFileName) {

		return getContent(file, 30);
	}

	public String getContent(File file, int endNum) {

		// 结束提取页数
		String content = "";

		try (PDDocument document = PDDocument.load(file)) {

			stripper.setStartPage(1);
			stripper.setEndPage(endNum);
			content = stripper.getText(document).trim();

		} catch (java.io.EOFException e) {
			// System.out.println("无法解析2" + path);
		} catch (Exception e) {
			// System.out.println("无法解析" + path);
		} finally {
			file = null;
		}
		if (content.length() == 0 && endNum == 10)
			content = "抱歉,文件暂时无法预览";
		return content.trim();
	}

	@Override
	public void indexFile(File file, String upFileName, String suffix) {

		Document doc = getSimDoc(file, upFileName, suffix);
		// 开始提取页数
		// 结束提取页数
		String content = "";
		try {
			content = getContent(file, 20);
			content = content.replaceAll("\\s+|\t|\n", "");
		} catch (Exception e) {

			// System.out.println("无法解析" + path);
		} finally {

			TextField tf = new TextField(FieldConstant.FILE_CONTENT, content, Store.NO);
			doc.add(tf);
			MyDirectoryS.addDocument(doc);

			tf = null;
			content = null;
			upFileName = null;
		}
	}

	public static void main(String[] args) throws EOFException {

	}
}
